# delimit;
clear;
set more off;
log using fig5, replace;

use data00;
drop populati;
gen violent_a=violent_;
drop violent_;
keep if age <=23 & sex!=3;
sort year dep_ocu age sex;
save one, replace;

use pop_sex;
keep if year<=2000;
sort year dep_ocu age sex;
save pop, replace;

use one;
merge year dep_ocu age sex using pop;

if population!=0 {
  replace death=0 if death==.;
  replace violent_a=0 if violent_a==.;
  replace disease=0 if disease==.;
  replace homicide=0 if homicide==.;
  replace accident=0 if accident==.;
};

gen newage=(age-7)*5;

***keep only males aged 15-59, according to the SAS file - double check the age filter here!;
keep if newage>=15 & newage<=55;
keep if sex==1;

replace newage=15 if newage==20;
replace newage=25 if newage==30;
replace newage=35 if newage==40;
replace newage=45 if newage==50;
replace newage=55 if newage==60;


******************************************
***generate growing department definitions;

gen grow94=1 if 
	dep_ocu==13 |
  	dep_ocu==18 |
	dep_ocu==19 |
	dep_ocu==50 |
	dep_ocu==52 |
	dep_ocu==86 |
	dep_ocu==95 |
	dep_ocu==97 |
	dep_ocu==99;
replace grow94=0 if grow94==.;

gen plante94=1 if 
	grow94==1 |
	dep_ocu==20 |
	dep_ocu==54 |
	dep_ocu==94;
replace plante94=0 if plante94==.;

gen plante94p=1 if
	plante94==1 |
	dep_ocu==44 |
	dep_ocu==47;
replace plante94p=0 if plante94p==.;

*** (*) uncomment line below for 14 dept. def. of growing rather than 9;
replace grow94=plante94p;

gen DMZ=1 if
	dep_ocu==50 |
	dep_ocu==18;
replace DMZ=0 if DMZ==.;

gen prov_type="Non-growing" if grow94==0;
replace prov_type="Growing" if grow94==1;
replace prov_type="DMZ" if grow94==1 & DMZ==1;
drop _merge;

***drop bogota DC, Medellin, Cali;
drop if dep_ocu==5 | dep_ocu==11 | dep_ocu==76;
save temp, replace;

**************************************************;


********************************************************
****collapsing to sum across groups and categories******;
use temp;
collapse (sum) death violent_a accident disease homicide population, by(year prov_type);
sort year prov_type;
save temp1, replace;

******because the variable violent has missing values for some year/department/age combinations, to be consistent with
******the way SAS handles missing values in a sum, I collapse 'violent' separately and merge it back in;
******I suspect 'violent' may still not be handled the way it is in the sas file, but it appears irrelevant to the
******subsequent analysis, so I'm leaving it as is;

use temp;
collapse (sum) violent if violent!=., by(year prov_type);
sort year prov_type;
save temp2, replace;

use temp1;
sort year prov_type;
merge year prov_type using temp2;
********************************************************;

********************************************************;

gen DMZ=1 if prov_type=="DMZ";
replace DMZ=0 if DMZ==.;

gen arate=100000*(accident/population);
gen vrate=100000*(violent_a/population);
gen drate=100000*(disease/population);
gen hrate=100000*(homicide/population);

gen lnvrate=log(violent_a/population) if violent_a>0;
gen lnvratio=log(violent_a/(death-violent_a)) if violent_a>0 & violent_a<death;
gen lndrate=log(disease/population) if disease>0;
gen post=1 if year>=1995;
replace post=0 if year<1995;

gen negyear=-year if year>=1993;
replace negyear=0 if year<1993;

gen d1990=1 if year==1990;
replace d1990=0 if year!=1990;


label variable lnvratio "logit violent death rate";
label variable vrate "violent death rate";
label variable drate "disease death rate";
label variable DMZ "Meta or Cauqeta";

***"remove base period levels" - i.e. demean lnvrate and lndrate by prov_type;

sort prov_type;
by prov_type: egen mean_lnvratio=mean(lnvratio);
replace lnvratio=lnvratio-mean_lnvratio;

**************************************************;
*****            data summary                *****;
**************************************************;
sort prov_type;
by prov_type: summarize;

**************************************************;
*****              Figure 5                 *****;
**************************************************;

twoway 
	(scatter lnvratio year if prov_type=="DMZ", msymbol(i) sort c(l) clc(black) clw(medium))
	(scatter lnvratio year if prov_type=="Growing", msymbol(o) mcolor(black) sort c(l) clc(black) clw(medium))
	(scatter lnvratio year if prov_type=="Non-growing", msymbol(i) sort c(l) clp("-") clc(black) clw(medium)),
	
	yline(0 .1 -.1, lpattern(dot) lc(black))
	ytitle("Death rates", si(small))
	ylabel(-.4(.1).5, labs(small) nogrid)
	xlabel(1990(1)2000, labs(small))
	xtitle("Year", si(small))
	legend(row(1) label(1 "DMZ") label(2 "Growing") label(3 "Non-growing") subtitle("Province Type", si(small)) si(small))
	title("Fig.5. Death Rates - Logit(Violence/Total) -- for Men Aged 15 - 59", si(small))
      note("1. Logits, relative to average by province type" "2. Non-growing omits Antioquia, Valle, and Bogota DC.")
	name(fig5, replace)
	saving(fig5, asis replace)	
;



erase temp.dta;
erase temp1.dta;
erase temp2.dta;
erase one.dta;
erase pop.dta;
log close;

